\contentsline {chapter}{\numberline {1}Requirements}{7}{chapter.1}
\contentsline {section}{\numberline {1.1}Functional requirements}{7}{section.1.1}
\contentsline {subsection}{\numberline {1.1.1}Documents preprocessing}{7}{subsection.1.1.1}
\contentsline {subsection}{\numberline {1.1.2}Linguistic processing}{7}{subsection.1.1.2}
\contentsline {subsection}{\numberline {1.1.3}Process linguistic results}{7}{subsection.1.1.3}
\contentsline {subsection}{\numberline {1.1.4}Search rules}{7}{subsection.1.1.4}
\contentsline {subsection}{\numberline {1.1.5}Create knowledge representation and store it}{8}{subsection.1.1.5}
\contentsline {subsection}{\numberline {1.1.6}Create search engine}{8}{subsection.1.1.6}
\contentsline {section}{\numberline {1.2}Other requirements}{8}{section.1.2}
\contentsline {section}{\numberline {1.3}Experimental evaluation}{8}{section.1.3}
\contentsline {chapter}{\numberline {2}What is Information extraction}{10}{chapter.2}
\contentsline {section}{\numberline {2.1}Basics}{10}{section.2.1}
\contentsline {section}{\numberline {2.2}Motivation}{10}{section.2.2}
\contentsline {section}{\numberline {2.3}IE origin}{11}{section.2.3}
\contentsline {section}{\numberline {2.4}Recent IE systems}{12}{section.2.4}
\contentsline {section}{\numberline {2.5}What is information}{13}{section.2.5}
\contentsline {subsection}{\numberline {2.5.1}Named entity recognition}{13}{subsection.2.5.1}
\contentsline {subsection}{\numberline {2.5.2}Relationship extraction and entity recognition}{14}{subsection.2.5.2}
\contentsline {subsection}{\numberline {2.5.3}Co-reference resolution}{14}{subsection.2.5.3}
\contentsline {subsection}{\numberline {2.5.4}Semi-structured IE}{14}{subsection.2.5.4}
\contentsline {subsection}{\numberline {2.5.5}Language and vocabulary analysis}{15}{subsection.2.5.5}
\contentsline {subsection}{\numberline {2.5.6}Summary}{15}{subsection.2.5.6}
\contentsline {chapter}{\numberline {3}Description of the provided documents}{16}{chapter.3}
\contentsline {section}{\numberline {3.1}Documents}{16}{section.3.1}
\contentsline {subsection}{\numberline {3.1.1}Field of experience summary}{16}{subsection.3.1.1}
\contentsline {subsection}{\numberline {3.1.2}Experience records}{16}{subsection.3.1.2}
\contentsline {subsection}{\numberline {3.1.3}Expert's profiles}{16}{subsection.3.1.3}
\contentsline {subsection}{\numberline {3.1.4}Substitution program}{17}{subsection.3.1.4}
\contentsline {subsection}{\numberline {3.1.5}Knowledge management concept}{17}{subsection.3.1.5}
\contentsline {subsection}{\numberline {3.1.6}Forms}{17}{subsection.3.1.6}
\contentsline {subsection}{\numberline {3.1.7}Yearly reports}{17}{subsection.3.1.7}
\contentsline {subsection}{\numberline {3.1.8}General presentations}{17}{subsection.3.1.8}
\contentsline {subsection}{\numberline {3.1.9}Reports}{17}{subsection.3.1.9}
\contentsline {subsection}{\numberline {3.1.10}Experience records from KM}{17}{subsection.3.1.10}
\contentsline {subsection}{\numberline {3.1.11}Management documentation}{18}{subsection.3.1.11}
\contentsline {subsection}{\numberline {3.1.12}General enterprise documentation}{18}{subsection.3.1.12}
\contentsline {subsection}{\numberline {3.1.13}Supplier's training}{18}{subsection.3.1.13}
\contentsline {section}{\numberline {3.2}Analysis from the NLP perspective}{18}{section.3.2}
\contentsline {section}{\numberline {3.3}Analysis from the IE perspective}{19}{section.3.3}
\contentsline {chapter}{\numberline {4}Natural language processing}{20}{chapter.4}
\contentsline {section}{\numberline {4.1}Motivation}{20}{section.4.1}
\contentsline {section}{\numberline {4.2}Analysis of Natural language text processing}{21}{section.4.2}
\contentsline {paragraph}{}{21}{section*.6}
\contentsline {section}{\numberline {4.3}NLP tool Treex}{22}{section.4.3}
\contentsline {subsection}{\numberline {4.3.1}Running Treex}{23}{subsection.4.3.1}
\contentsline {section}{\numberline {4.4}What carries information in a sentence}{25}{section.4.4}
\contentsline {subsection}{\numberline {4.4.1}Simple sentence containing verb}{25}{subsection.4.4.1}
\contentsline {subsection}{\numberline {4.4.2}Ellipsis}{26}{subsection.4.4.2}
\contentsline {subsection}{\numberline {4.4.3}Coordination}{26}{subsection.4.4.3}
\contentsline {subsection}{\numberline {4.4.4}Parenthesis}{27}{subsection.4.4.4}
\contentsline {subsection}{\numberline {4.4.5}Complex phenomenons}{27}{subsection.4.4.5}
\contentsline {chapter}{\numberline {5}Information Extraction}{28}{chapter.5}
\contentsline {section}{\numberline {5.1}Dependency patterns}{28}{section.5.1}
\contentsline {subsection}{\numberline {5.1.1}Predicate-Argument model (SVO)}{29}{subsection.5.1.1}
\contentsline {subsection}{\numberline {5.1.2}Chain model}{29}{subsection.5.1.2}
\contentsline {subsection}{\numberline {5.1.3}Linked Chain model}{29}{subsection.5.1.3}
\contentsline {subsection}{\numberline {5.1.4}Unconstrained Linked Chain model (ULC)}{29}{subsection.5.1.4}
\contentsline {subsection}{\numberline {5.1.5}Shortest Path model}{30}{subsection.5.1.5}
\contentsline {subsection}{\numberline {5.1.6}Subtree model}{30}{subsection.5.1.6}
\contentsline {section}{\numberline {5.2}Analysis of patterns and possible solutions}{30}{section.5.2}
\contentsline {subsection}{\numberline {5.2.1}Named entity detection and relation extraction}{30}{subsection.5.2.1}
\contentsline {subsection}{\numberline {5.2.2}Co-reference resolution}{31}{subsection.5.2.2}
\contentsline {subsection}{\numberline {5.2.3}Coordination}{31}{subsection.5.2.3}
\contentsline {subsection}{\numberline {5.2.4}Terminology extraction}{32}{subsection.5.2.4}
\contentsline {section}{\numberline {5.3}Summary}{32}{section.5.3}
\contentsline {chapter}{\numberline {6}IE evaluation}{34}{chapter.6}
\contentsline {section}{\numberline {6.1}Quantitative results}{34}{section.6.1}
\contentsline {section}{\numberline {6.2}Discovered NLP problems}{35}{section.6.2}
\contentsline {subsection}{\numberline {6.2.1}Enumerations}{35}{subsection.6.2.1}
\contentsline {subsection}{\numberline {6.2.2}Terms specification}{36}{subsection.6.2.2}
\contentsline {section}{\numberline {6.3}Summary}{36}{section.6.3}
\contentsline {chapter}{\numberline {7}Describing and storing the information}{37}{chapter.7}
\contentsline {section}{\numberline {7.1}Linked Data principles}{37}{section.7.1}
\contentsline {subsection}{\numberline {7.1.1}RDF definition}{38}{subsection.7.1.1}
\contentsline {subsection}{\numberline {7.1.2}RDF Schema definition}{39}{subsection.7.1.2}
\contentsline {subsection}{\numberline {7.1.3}How to code extracted information}{39}{subsection.7.1.3}
\contentsline {section}{\numberline {7.2}Modeling knowledge}{39}{section.7.2}
\contentsline {subsection}{\numberline {7.2.1}URI Prefix}{40}{subsection.7.2.1}
\contentsline {subsection}{\numberline {7.2.2}Representing a subject, predicate, object}{40}{subsection.7.2.2}
\contentsline {subsection}{\numberline {7.2.3}Terminology representation}{41}{subsection.7.2.3}
\contentsline {subsection}{\numberline {7.2.4}Document representation}{41}{subsection.7.2.4}
\contentsline {section}{\numberline {7.3}Searching for a proper database}{42}{section.7.3}
\contentsline {section}{\numberline {7.4}Summary}{42}{section.7.4}
\contentsline {chapter}{\numberline {8}Search Engine}{44}{chapter.8}
\contentsline {section}{\numberline {8.1}Available informations}{44}{section.8.1}
\contentsline {section}{\numberline {8.2}Search specification}{44}{section.8.2}
\contentsline {subsection}{\numberline {8.2.1}Search based on triples match}{44}{subsection.8.2.1}
\contentsline {subsection}{\numberline {8.2.2}Search based on terminology match}{45}{subsection.8.2.2}
\contentsline {subsection}{\numberline {8.2.3}Relevance of the terminology}{46}{subsection.8.2.3}
\contentsline {subsection}{\numberline {8.2.4}Non document search specification}{47}{subsection.8.2.4}
\contentsline {section}{\numberline {8.3}SPARQL Definition}{47}{section.8.3}
\contentsline {chapter}{\numberline {9}Implementation}{49}{chapter.9}
\contentsline {section}{\numberline {9.1}Decomposition}{49}{section.9.1}
\contentsline {section}{\numberline {9.2}Architecture}{50}{section.9.2}
\contentsline {subsection}{\numberline {9.2.1}Model}{50}{subsection.9.2.1}
\contentsline {subsection}{\numberline {9.2.2}View and Controller}{51}{subsection.9.2.2}
\contentsline {chapter}{\numberline {10}Implementation of IE}{52}{chapter.10}
\contentsline {section}{\numberline {10.1}Plain Text Extraction}{52}{section.10.1}
\contentsline {subsection}{\numberline {10.1.1}Implementation}{53}{subsection.10.1.1}
\contentsline {subsection}{\numberline {10.1.2}Observation}{53}{subsection.10.1.2}
\contentsline {section}{\numberline {10.2}Linguistic processing}{54}{section.10.2}
\contentsline {subsection}{\numberline {10.2.1}Implementation}{54}{subsection.10.2.1}
\contentsline {section}{\numberline {10.3}Observation}{55}{section.10.3}
\contentsline {section}{\numberline {10.4}Document Tree Structure}{55}{section.10.4}
\contentsline {subsection}{\numberline {10.4.1}Implementation}{55}{subsection.10.4.1}
\contentsline {section}{\numberline {10.5}IE implementation}{56}{section.10.5}
\contentsline {subsection}{\numberline {10.5.1}Search rules and search process}{57}{subsection.10.5.1}
\contentsline {subsection}{\numberline {10.5.2}Writing search configuration}{58}{subsection.10.5.2}
\contentsline {chapter}{\numberline {11}Database Implementation}{60}{chapter.11}
\contentsline {section}{\numberline {11.1}Ontology}{60}{section.11.1}
\contentsline {section}{\numberline {11.2}Working with Virtuoso database}{61}{section.11.2}
\contentsline {section}{\numberline {11.3}Converting and storing extracted informations into a graph}{61}{section.11.3}
\contentsline {subsection}{\numberline {11.3.1}Selecting extracted knowledge to store}{61}{subsection.11.3.1}
\contentsline {subsection}{\numberline {11.3.2}Storing extracted knowledge into the database}{61}{subsection.11.3.2}
\contentsline {section}{\numberline {11.4}Implementation}{63}{section.11.4}
\contentsline {chapter}{\numberline {12}Search Implementation}{64}{chapter.12}
\contentsline {section}{\numberline {12.1}Search requirements}{64}{section.12.1}
\contentsline {section}{\numberline {12.2}Implementation}{64}{section.12.2}
\contentsline {subsection}{\numberline {12.2.1}Search for triples}{65}{subsection.12.2.1}
\contentsline {subsection}{\numberline {12.2.2}Search for resource}{66}{subsection.12.2.2}
\contentsline {subsection}{\numberline {12.2.3}Document search based on a triple}{66}{subsection.12.2.3}
\contentsline {subsection}{\numberline {12.2.4}Document search based on a terminology}{66}{subsection.12.2.4}
\contentsline {subsection}{\numberline {12.2.5}Document search based on a triple and a terminology}{67}{subsection.12.2.5}
\contentsline {chapter}{\numberline {13}User Interface}{68}{chapter.13}
\contentsline {section}{\numberline {13.1}Overview}{68}{section.13.1}
\contentsline {section}{\numberline {13.2}Triples and terminologies}{68}{section.13.2}
\contentsline {section}{\numberline {13.3}Processing documents}{69}{section.13.3}
\contentsline {section}{\numberline {13.4}Examples}{69}{section.13.4}
\contentsline {chapter}{\numberline {14}Qualitative results}{73}{chapter.14}
\contentsline {chapter}{Conclusion}{74}{chapter*.7}
\contentsline {chapter}{Bibliography}{75}{chapter*.8}
\contentsline {chapter}{List of Tables}{79}{chapter*.9}
\contentsline {chapter}{List of Figures}{80}{chapter*.10}
\contentsline {chapter}{List of Abbreviations}{81}{chapter*.11}
\contentsline {chapter}{\numberline {A}Appendix - Treex Installation}{82}{Appendix.a.A}
\contentsline {paragraph}{Missing packages, modules:}{82}{section*.12}
\contentsline {chapter}{\numberline {B}Appendix - Application installation}{83}{Appendix.b.B}
\contentsline {paragraph}{Installation of Virtuoso database}{83}{section*.13}
\contentsline {paragraph}{Installation of Application server}{83}{section*.14}
\contentsline {paragraph}{Compiling and running treexServiceServer module}{83}{section*.15}
\contentsline {paragraph}{Compiling and deploying application}{83}{section*.16}
